source(here("GET_VARS.R"))
files <- dir(here("ballet_990_released_20230208"),
full.names = TRUE)
comp <- map_df(files, ~get_df(filename = .x, schedule = "j"))
comp_clean <- comp %>%
rename_with(.cols= everything(),
~gsub('/Return/ReturnData/IRS990ScheduleJ/', '', .)) %>%
select(-contains("Ind")) %>%
select(fiscal_year, EIN,
contains("RltdOrgOfficerTrstKeyEmplGrp")) %>%
# only extract cols within the RltdOrgOfficerTrstKeyEmplGrp
select(EIN, fiscal_year,
matches("RltdOrgOfficerTrstKeyEmplGrp\\[.*.\\]/")) %>%
pivot_longer(-c(EIN,fiscal_year)) %>%
mutate(id = gsub("\\D", "", name),
# name_old = name,
name = gsub(".*./", "", name),
id = gsub("990", "", id))
comp_clean <- comp_clean %>%
filter(!is.na(value)) %>%
distinct() %>%
pivot_wider(names_from = name, values_from = value)
comp_clean <- comp_clean %>%
mutate(across(contains("Amt"), as.numeric))%>%
mutate(TitleTxt=tolower(TitleTxt))
saveRDS(comp_clean, here("data", "schedj.RDS"))
# clean up title text field because it was free text in the form 990
comp_clean <- comp_clean %>%
mutate(TitleTxt = gsub("dancer/choreographer",
"dancer / choreographer",
TitleTxt),
TitleTxt = gsub("vp", "Vice President", TitleTxt),
TitleTxt = gsub("dorector", "director",TitleTxt),
title_clean = case_when(
grepl("ceo", TitleTxt, ignore.case = TRUE ) ~"CEO",
grepl("cfo", TitleTxt, ignore.case = TRUE)~ "Chief Financial Officer",
grepl("executive dir", TitleTxt, ignore.case = TRUE) ~"Executive Director",
grepl("artistic dir",TitleTxt, ignore.case = TRUE) ~"Artistic Director",
grepl("emeritus|emerita", TitleTxt, ignore.case = TRUE) ~"Emirita/Emiritus Position",
grepl( "chief dev",TitleTxt, ignore.case=TRUE) &
grepl("officer",TitleTxt, ignore.case = TRUE) ~"Chief Development Officer",
grepl("director of market|marketing director",TitleTxt, ignore.case = TRUE) ~ "Director of Marketing",
grepl("music director",TitleTxt, ignore.case = TRUE) ~"Music Director",
grepl("mktg", TitleTxt, ignore.case = TRUE ) &
grepl("officer|ofc", TitleTxt, ignore.case = TRUE ) ~ "Marketing Officer",
grepl("Director of Development",TitleTxt, ignore.case = TRUE) ~ "Director of Development",
grepl("chief",TitleTxt, ignore.case = TRUE) &
grepl("officer",TitleTxt, ignore.case = TRUE) ~ "Other Chief Officer",
grepl("Dir of Legal",TitleTxt, ignore.case = TRUE) ~"Director of Legal Affairs",
grepl("Former Senior Dir", TitleTxt, ignore.case = TRUE) ~ "Former Senior Director",
grepl("Director|Dir", TitleTxt, ignore.case = TRUE) ~ "Other Director",
grepl("Director", TitleTxt, ignore.case = TRUE) ~ "Other Director",
TRUE ~ TitleTxt
))
Number of Individuals with Title
# number of individuals with title
comp_clean %>%
mutate(title_clean=tolower(title_clean)) %>%
filter(!is.na(title_clean)) %>%
group_by(title_clean) %>%
summarize(`Number of Individuals in Position` = n()) %>%
arrange(desc(`Number of Individuals in Position`))
# missingness by variable
# comp_clean %>%
# select(-c(EIN,fiscal_year,id)) %>%
# is.na() %>%
# colSums() %>%
# as_tibble(rownames="Variable") %>%
# mutate(`Not Missing` = nrow(comp_clean) - value) %>%
# select(-value)
comp_clean %>%
group_by(title_clean) %>%
mutate(m = median(BaseCompensationFilingOrgAmt, na.rm= TRUE)) %>%
filter(!is.na(title_clean)) %>%
ungroup() %>%
ggplot(aes(x=fct_reorder(title_clean,m),
y = BaseCompensationFilingOrgAmt)) +
geom_jitter(alpha = .5, size = .5, height = 0, width = .05) +
coord_flip() +
theme_bw() +
labs(title = "Compensation by Title",
x = "Title")+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"),
axis.text.x= element_text(size = 8))

plt <- comp_clean %>%
group_by(EIN, fiscal_year) %>%
summarize(total_compensation = sum(BaseCompensationFilingOrgAmt)) %>%
group_by(EIN) %>%
mutate(m = median(total_compensation, na.rm= TRUE)) %>%
ungroup() %>%
# group_by(EIN) %>%
mutate(tile = ntile(m,2),
tilename = ifelse(tile == 1,
"EINs Below the Median",
"EINs Above the Median"),
tilename = factor(tilename, levels = c( "EINs Below the Median",
"EINs Above the Median"))) %>%
ggplot(aes(x=fiscal_year,
y = total_compensation,
color = EIN,
group = EIN)) +
geom_line() +
geom_point() +
labs(title = "Compensation to Highest Paid Employees",
subtitle = "Total Compensation to Highest Paid Employees By EIN",
y = "Total Compensation",
x = "Fiscal Year")+
viridis::scale_color_viridis(discrete=TRUE,
option = "rocket",
end = .9) +
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold")) +
facet_wrap(~tile, scales = "free_y") +
scale_y_continuous(labels = comma)
ggplotly(plt,height = 500, width =850) %>%
layout(margin = m)
# plot compensation versus beginning year balance by fiscal year
comp <- comp_clean %>%
mutate(fiscal_year = as.numeric(paste(fiscal_year))) %>%
left_join(endowment_data_wide) %>%
group_by(EIN, fiscal_year, BeginningYearBalanceAmt, organization_name) %>%
summarize(total_compensation = sum(BaseCompensationFilingOrgAmt))
plt <- comp %>%
ggplot(aes(x=BeginningYearBalanceAmt,
y = total_compensation,
color = EIN)) +
geom_point() +
facet_wrap(~fiscal_year, nrow = 2)+
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"))+
viridis::scale_color_viridis(discrete=TRUE,
option = "magma",
end = .9) +
labs(title = "Total Compensation to Highest Paid Employees\nby Beginning of Year Balance",
x = "Beginning of Year Balance",
y = "Total Compensation")
ggplotly(plt, height = 500, width = 850) %>%
layout(margin = m)
# logged scales
plt <- comp %>%
ggplot(aes(x=BeginningYearBalanceAmt,
y = total_compensation,
color = EIN)) +
geom_point() +
facet_wrap(~fiscal_year, nrow = 2)+
theme_bw()+
theme(plot.title = element_text(size = 18,
hjust = .5, face="bold",),
plot.subtitle = element_text(hjust = .5,
face="italic",
size = 16),
axis.title = element_text(size = 13,
face = "bold"))+
viridis::scale_color_viridis(discrete=TRUE,
option = "magma",
end = .9) +
scale_x_log10() +
scale_y_log10()
plot_ranks("BeginningYearBalanceAmt",
"total_compensation", data = comp )